# ------------------------------------------------------------------------------------------------
### Load Data
# ------------------------------------------------------------------------------------------------
# Load the processed LGBTQ election data
load("../data/processed/LGBTQ_election_data.Rda")

# ------------------------------------------------------------------------------------------------
### Prepare Analyses:
# - Exclude Warsaw for all subsequent analyses
# - Define function to calculate sDiD estimates and standard errors
# - Define helper function to run analyses for different samples
# - Create lists of outcome variables
# ------------------------------------------------------------------------------------------------

# Exclude Warsaw for all subsequent analyses
LGBTQ_election_data <- LGBTQ_election_data %>%
  filter(id_county != 1465)

# Create cutoff sample
LGBTQ_election_data_cutoff <- create_cutoff_sample(LGBTQ_election_data)

# Function to calculate SDID estimates and standard errors
calculate_sdid <- function(data, var) {
  sc_did <- data %>% 
    mutate(treat = as.integer(treat)) %>% 
    select(id, eyear, !!sym(var), treat)
  n_obs <- nrow(sc_did %>% filter(!is.na(id) & !is.na(eyear) & !is.na(treat) & !is.na(!!sym(var)))) 
  setup <- panel.matrices(as.data.frame(as_tibble(sc_did)))
  estimates <- synthdid_estimate(setup$Y, setup$N0, setup$T0)
  ste <- sqrt(vcov(estimates, method='jackknife'))
  return(list(estimate = estimates[1], ste = ste[1], n_obs = n_obs))
}

# Helper function to run analyses for different samples
run_analysis_sample <- function(data, sample_name, party_share_list, party_turnout_list) {
  # Run party share analyses
  party_share <- lapply(names(party_share_list), function(var) {
    calculate_sdid(data, var)
  })
  names(party_share) <- names(party_share_list)
  
  # Run party turnout analyses
  party_turnout <- lapply(names(party_turnout_list), function(var) {
    calculate_sdid(data, var)
  })
  names(party_turnout) <- names(party_turnout_list)
  
  return(list(party_share = party_share, party_turnout = party_turnout))
}

# create lists of outcome variables
party_share_list <- list(pis=NULL, po=NULL, psl=NULL, sld=NULL, non_pis=NULL)
party_turnout_list <- list(pis_support=NULL, po_support=NULL, psl_support=NULL, 
                          sld_support=NULL, non_pis_support=NULL, turnout=NULL)


# ------------------------------------------------------------------------------------------------
### Main Analyses: 
# - Effects on overall, opposition, and government turnout (Figure 2, Table 2)
# - Effects on Vote Shares and Turnout by Party (Figure 10, Table 3)
# ------------------------------------------------------------------------------------------------

# Main sample analyses
main_results <- run_analysis_sample(LGBTQ_election_data, "Full Sample", party_share_list, party_turnout_list)
cutoff_results <- run_analysis_sample(LGBTQ_election_data_cutoff, "< 50km Sample", party_share_list, party_turnout_list)

# ------------------------------------------------------------------------------------------------
### Combining Levels of Government:
# - Effects on overall, opposition, and government turnout by sum of treatment levels (Figure 11, Table 4)
# ------------------------------------------------------------------------------------------------

# Create filtered datasets for different treatment levels
level_filters <- list(
  one_level = list(filter_expr = quote(sum_count_any_type == 1 | any_level_any_type == 0)),
  two_level = list(filter_expr = quote(sum_count_any_type == 2 | any_level_any_type == 0)),
  three_level = list(filter_expr = quote(sum_count_any_type == 3 | any_level_any_type == 0))
)

level_results <- lapply(names(level_filters), function(level_name) {
  filtered_data <- LGBTQ_election_data %>%
    filter(!!level_filters[[level_name]]$filter_expr)
  run_analysis_sample(filtered_data, paste("Level", level_name), party_share_list, party_turnout_list)
})
names(level_results) <- names(level_filters)

# ------------------------------------------------------------------------------------------------
### Combining Resolutions Types:
# - Effects on overall, opposition, and government turnout by sum of resolutions types 
#   (Figure 12, Table 5)
# ------------------------------------------------------------------------------------------------

# Create filtered datasets for different resolution types
type_filters <- list(
  one_type = list(filter_expr = quote(sum_count_any_level == 1 | any_level_any_type == 0)),
  two_type = list(filter_expr = quote(sum_count_any_level == 2 | any_level_any_type == 0))
)

type_results <- lapply(names(type_filters), function(type_name) {
  filtered_data <- LGBTQ_election_data %>%
    filter(!!type_filters[[type_name]]$filter_expr)
  run_analysis_sample(filtered_data, paste("Type", type_name), party_share_list, party_turnout_list)
})
names(type_results) <- names(type_filters)

# ------------------------------------------------------------------------------------------------
### Levels of Government:
# - Effects on overall, opposition, and government turnout separated by government unit passing 
#   the resolution (Figure 14, Table 6)
# ------------------------------------------------------------------------------------------------

# Create filtered datasets for different government levels
gov_level_filters <- list(
  province = list(filter_expr = quote(province_any_type == 1 | any_level_any_type == 0)),
  county = list(filter_expr = quote(county_any_type == 1 | any_level_any_type == 0)),
  municipality = list(filter_expr = quote(municipality_any_type == 1 | any_level_any_type == 0))
)

gov_level_results <- lapply(names(gov_level_filters), function(gov_name) {
  filtered_data <- LGBTQ_election_data %>%
    filter(!!gov_level_filters[[gov_name]]$filter_expr)
  run_analysis_sample(filtered_data, paste(gov_name, "Level"), party_share_list, party_turnout_list)
})
names(gov_level_results) <- names(gov_level_filters)

# ------------------------------------------------------------------------------------------------
### Resolutions Types:
# - Effects on overall, opposition, and government turnout separated by resolution type 
#   (Figure 15, Table 7)
# ------------------------------------------------------------------------------------------------

# Create filtered datasets for different resolution types
resolution_type_filters <- list(
  type1 = list(filter_expr = quote(any_level_type_1 == 1 | any_level_any_type == 0)),
  type2 = list(filter_expr = quote(any_level_type_2 == 1 | any_level_any_type == 0))
)

resolution_type_results <- lapply(names(resolution_type_filters), function(res_type_name) {
  filtered_data <- LGBTQ_election_data %>%
    filter(!!resolution_type_filters[[res_type_name]]$filter_expr)
  run_analysis_sample(filtered_data, paste("Resolution Type", res_type_name), party_share_list, party_turnout_list)
})
names(resolution_type_results) <- names(resolution_type_filters)

# ------------------------------------------------------------------------------------------------
### Never-Treated Synthetic Control Group:
# - Effects on overall, opposition, and government turnout for never treated control 
#   (Figure 16, Table 8)
# ------------------------------------------------------------------------------------------------

# Create never-treated control datasets
nevertreated_control <- LGBTQ_election_data %>%
  filter(wave_id %in% c(0,1))

nevertreated_control_cutoff <- LGBTQ_election_data_cutoff %>%
  filter(wave_id %in% c(0,1))

nevertreated_results <- run_analysis_sample(nevertreated_control, "Never Treated", party_share_list, party_turnout_list)
nevertreated_cutoff_results <- run_analysis_sample(nevertreated_control_cutoff, "Never Treated Cutoff", party_share_list, party_turnout_list)

# ------------------------------------------------------------------------------------------------
### Permutation-Based Test of sDiD Estimates:
# - Estimates from 10,000 simulations that randomly assigned municipalities to treatment
#   (Figure 17)
# ------------------------------------------------------------------------------------------------
# Estimate the main ATTs to have a benchmark ----
df0 <- LGBTQ_election_data %>%
  mutate(
    post  = as.integer(eyear >= 2019),
    treat = as.integer(treat)
  )

outcomes <- c("turnout", "pis_support", "non_pis_support")

# Set up the simulation ----
unit_ids <- unique(df0$id)
N        <- length(unit_ids)
m        <- sum(df0$treat == 1)
B        <- 10000

# Run simulation & store estimates ----
pb <- progress::progress_bar$new(total=B, format="  Simulations [:bar] :percent eta: :eta")

simulations <- map_dfr(seq_len(B), function(b) {
  pb$tick()
  
  # draw and merge
  assignment_vec <- complete_ra(N=N, m=m)
  assignment_df  <- tibble(id = unit_ids,
                           treat_new = assignment_vec)
  
  df1 <- df0 %>%
    left_join(assignment_df, by = "id") %>%
    mutate(treat_perm = if_else(post==1 & treat_new==1, 1L, 0L))
  
  # estimate each outcome, return a 1-row tibble
  map_dfr(outcomes, function(var) {
    pm  <- panel.matrices(
      as.data.frame(df1 %>% select(id, eyear, all_of(var), treat_perm)),
      1,2,3,4
    )
    est <- synthdid_estimate(pm$Y, pm$N0, pm$T0)[1]
    tibble(
      draw    = b,
      outcome = recode(var,
                       turnout      = "Overall Turnout",
                       pis_support  = "Government Turnout",
                       non_pis_support = "Opposition Turnout"),
      att      = est
    )
  })
}, .id = NULL)

# Save the simulation results
save(simulations, file="../data/results/simulations.Rda")

# ------------------------------------------------------------------------------------------------
### Municipalities <300,000 Inhabitants:
# - Effects on overall, opposition, and government turnout for municipalities <300,000 inhabitants 
#   (Figure 18, Table 10)
# ------------------------------------------------------------------------------------------------

# Cities to exclude (>300k inhabitants)
exclude_cities <- c("1465", # Warsaw
                 "1061", # Lodz
                 "1261", # Krakow
                 "0264", # Wroclaw
                 "3064", # Poznan
                 "2261", # Gdansk
                 "3262", # Szczecin
                 "0461", # Bydgoszcz
                 "0663", # Lublin
                 "2469") # Katowice
              
no300k_data <- LGBTQ_election_data %>%
  filter(!(id_county %in% exclude_cities))

no300k_cutoff_data <- LGBTQ_election_data_cutoff %>%
  filter(!(id_county %in% exclude_cities))

no300k_results <- run_analysis_sample(no300k_data, "No cities >300k", party_share_list, party_turnout_list)
no300k_cutoff_results <- run_analysis_sample(no300k_cutoff_data, "No cities >300k Cutoff", party_share_list, party_turnout_list)

# ------------------------------------------------------------------------------------------------
### Excluding Proposed but not Adopted Resolutions:
# - Effects on overall, opposition, and government turnout without municipalities proposing but not adopting 
#   (Figure 19, Table 11)
# ------------------------------------------------------------------------------------------------

# Create datasets excluding proposed but not adopted resolutions
noproposed_data <- LGBTQ_election_data %>%
  filter(any_level_proposed != 1)

noproposed_cutoff_data <- LGBTQ_election_data_cutoff %>%
  filter(any_level_proposed != 1)

noproposed_results <- run_analysis_sample(noproposed_data, "No Proposed", party_share_list, party_turnout_list)
noproposed_cutoff_results <- run_analysis_sample(noproposed_cutoff_data, "No Proposed Cutoff", party_share_list, party_turnout_list)


# ------------------------------------------------------------------------------------------------
### Combine all results:
# - Combine all results into a single dataset and save under processed_data/sdid_results.Rda
# ------------------------------------------------------------------------------------------------

# Helper function to extract results from analysis objects
extract_results <- function(results_list, sample_name, sum_level = "Any Level", sum_type = "Any Type", 
                           level = "Any Level", type = "Any Type", control_sample = "notyettreated",
                           no300k_sample = "Full Sample", noproposed_sample = "Full Sample") {
  
  # Extract party share results
  party_share_data <- lapply(names(party_share_list), function(party) {
    result <- results_list$party_share[[party]]
    tibble(
      party = party,
      outcome = "Vote Share",
      estimate = result$estimate,
      se = result$ste,
      n_obs = result$n_obs
    )
  }) %>% bind_rows()
  
  # Extract party turnout results
  party_turnout_data <- lapply(names(party_turnout_list), function(party) {
    result <- results_list$party_turnout[[party]]
    tibble(
      party = party,
      outcome = if(party == "turnout") "Overall Turnout" else "Party Turnout",
      estimate = result$estimate,
      se = result$ste,
      n_obs = result$n_obs
    )
  }) %>% bind_rows()
  
  # Combine and add metadata
  bind_rows(party_share_data, party_turnout_data) %>%
    mutate(
      sample = sample_name,
      sum_level = sum_level,
      sum_type = sum_type,
      level = level,
      type = type,
      control_sample = control_sample,
      no300k_sample = no300k_sample,
      noproposed_sample = noproposed_sample
    )
}

# Combine all results
all_results <- bind_rows(
  extract_results(main_results, "Full Sample"),
  extract_results(cutoff_results, "< 50km Sample"),
  extract_results(level_results$one_level, "Full Sample", sum_level = "One Level"),
  extract_results(level_results$two_level, "Full Sample", sum_level = "Two Levels"),
  extract_results(level_results$three_level, "Full Sample", sum_level = "Three Levels"),
  extract_results(type_results$one_type, "Full Sample", sum_type = "One Type"),
  extract_results(type_results$two_type, "Full Sample", sum_type = "Two Types"),
  extract_results(gov_level_results$province, "Full Sample", level = "Province Level"),
  extract_results(gov_level_results$county, "Full Sample", level = "County Level"),
  extract_results(gov_level_results$municipality, "Full Sample", level = "Municipality Level"),
  extract_results(resolution_type_results$type1, "Full Sample", type = "Resolution against LGBT ideology"),
  extract_results(resolution_type_results$type2, "Full Sample", type = "Charter of the Rights of the Family"),
  extract_results(nevertreated_results, "Full Sample", control_sample = "nevertreated"),
  extract_results(nevertreated_cutoff_results, "< 50km Sample", control_sample = "nevertreated"),
  extract_results(no300k_results, "Full Sample", no300k_sample = "No cities >300k"),
  extract_results(no300k_cutoff_results, "< 50km Sample", no300k_sample = "No cities >300k"),
  extract_results(noproposed_results, "Full Sample", noproposed_sample = "No Proposed"),
  extract_results(noproposed_cutoff_results, "< 50km Sample", noproposed_sample = "No Proposed")
)

# Clean up party names and add confidence intervals
sdid_results <- all_results %>%
  mutate(
    party = case_when(
      party %in% c("pis", "pis_support") ~ "Law and Justice (PiS)",
      party %in% c("po", "po_support") ~ "Civic Coalition (KO)",
      party %in% c("psl", "psl_support") ~ "Polish Coalition (KP)",
      party %in% c("sld", "sld_support") ~ "The Left (Lewica)",
      party %in% c("non_pis", "non_pis_support") ~ "Opposition",
      party == "turnout" ~ "Overall Turnout",
      TRUE ~ party
    ),
    # Calculate confidence intervals
    lower_cb_99 = estimate - 2.576 * se,
    upper_cb_99 = estimate + 2.576 * se,
    lower_cb_95 = estimate - 1.96 * se,
    upper_cb_95 = estimate + 1.96 * se,
    lower_cb_90 = estimate - 1.645 * se,
    upper_cb_90 = estimate + 1.645 * se
  ) %>%
  # Ensure numeric columns
  mutate(across(c(estimate, starts_with("lower_cb"), starts_with("upper_cb")), as.numeric))

save(sdid_results, file="../data/results/sdid_results.Rda")